import pandas as pd
import numpy as np
import igraph as ig
import collections
import matplotlib.pyplot as plt
%matplotlib inline
df_unweighted = pd.read_csv('combined_final_unweighted.csv')
df_unweighted
| node_start | node_end | status | |
|---|---|---|---|
| 0 | Hong Kong | Samoa | jurisdiction |
| 1 | Hong Kong | Samoa | jurisdiction |
| 2 | Hong Kong | Samoa | jurisdiction |
| 3 | Hong Kong | Samoa | jurisdiction |
| 4 | Hong Kong | Samoa | jurisdiction |
| ... | ... | ... | ... |
| 338127 | Indonesia | British Virgin Islands | officer_base |
| 338128 | Cook Islands | British Virgin Islands | officer_base |
| 338129 | Hong Kong | British Virgin Islands | officer_base |
| 338130 | Cook Islands | United Kingdom | officer_base |
| 338131 | Hong Kong | United Kingdom | officer_base |
338132 rows × 3 columns
df_weighted = pd.read_csv('combined_final_weighted.csv')
df_weighted
| node_start | node_end | status | weight | |
|---|---|---|---|---|
| 0 | Hong Kong | Samoa | jurisdiction | 2.0 |
| 1 | Hong Kong | Samoa | jurisdiction | 2.0 |
| 2 | Hong Kong | Samoa | jurisdiction | 2.0 |
| 3 | Hong Kong | Samoa | jurisdiction | 2.0 |
| 4 | Hong Kong | Samoa | jurisdiction | 2.0 |
| ... | ... | ... | ... | ... |
| 338119 | Indonesia | British Virgin Islands | officer_base | 1.0 |
| 338120 | Cook Islands | British Virgin Islands | officer_base | 1.0 |
| 338121 | Hong Kong | British Virgin Islands | officer_base | 1.0 |
| 338122 | Cook Islands | United Kingdom | officer_base | 1.0 |
| 338123 | Hong Kong | United Kingdom | officer_base | 1.0 |
338124 rows × 4 columns
df_weighted['status'].unique()
array(['jurisdiction', 'intermediary_base', 'intermediary_jurisdiction',
'officer_base', 'officer_jurisdiction'], dtype=object)
df_weighted_grouped = pd.read_csv('combined_final_weighted_groupedby.csv')
df_weighted_grouped
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Albania | Bahamas | 4.0 |
| 1 | Algeria | British Virgin Islands | 2.0 |
| 2 | Algeria | United Kingdom | 2.0 |
| 3 | Algeria | United States | 1.0 |
| 4 | Andorra | Bahamas | 52.0 |
| ... | ... | ... | ... |
| 2835 | Zimbabwe | Hong Kong | 3.0 |
| 2836 | Zimbabwe | Jersey | 1.0 |
| 2837 | Zimbabwe | Panama | 6.0 |
| 2838 | Zimbabwe | Russia | 2.0 |
| 2839 | Zimbabwe | United States | 1.0 |
2840 rows × 3 columns
df_weighted_grouped.describe()
| weight | |
|---|---|
| count | 2840.000000 |
| mean | 194.889613 |
| std | 1729.454201 |
| min | 0.500000 |
| 25% | 2.000000 |
| 50% | 5.000000 |
| 75% | 24.000000 |
| max | 65366.500000 |
# df_jur_grouped_list = df_jur_grouped.values.tolist()
# df_jur_grouped_list
# d = collections.defaultdict(int)
# for n1, n2, v in df_jur_grouped_list:
# d[min(n1, n2), max(n1, n2)] += v
# result = [[k[0], k[1], v] for k, v in d.items()]
# df_net = pd.DataFrame(result)
# df_net
# df_net = df_net.rename(columns = {0:'node_1', 1:'node_2', 2: 'weight'})
# df_net
#df_net.to_csv('undirected_weighted_grouped.csv', index = False)
# df_net.describe()
def get_diameter(community):
diameters = []
for i in range(len(community.subgraphs())):
diameters.append(community.subgraphs()[i].diameter())
return (diameters)
def get_transitivity(community):
transitivity = []
for i in range(len(community.subgraphs())):
transitivity.append(community.subgraphs()[i].transitivity_undirected())
return (transitivity)
def get_avg_degree(community):
degree = []
for i in range(len(community.subgraphs())):
degree.append(np.mean(community.subgraphs()[i].degree()))
return degree
def get_avg_weighted_degree(community):
weighted_degree = []
for i in range(len(community.subgraphs())):
weighted_degree.append(np.mean(community.subgraphs()[i].strength(weights = 'weight')))
return weighted_degree
g = ig.Graph.TupleList(df_weighted_grouped.itertuples(index=False), directed=True, edge_attrs={'weight': "weight"})
ig.plot(g)
g.diameter()
4
print( "Clustering Coefficient: ", g.transitivity_undirected())
Clustering Coefficient: 0.3859593515919117
np.mean(g.degree())
28.979591836734695
np.mean(g.strength(weights = 'weight'))
5647.821428571428
df_strength = pd.DataFrame({'name': list(g.vs['name']), 'weighted_degree':g.strength(weights = 'weight') } )
df_strength = df_strength.sort_values('weighted_degree', ascending = False).reset_index(drop = True)
df_strength.head(10)
| name | weighted_degree | |
|---|---|---|
| 0 | British Virgin Islands | 287863.5 |
| 1 | Hong Kong | 113074.5 |
| 2 | Panama | 109901.0 |
| 3 | Switzerland | 77214.5 |
| 4 | Bahamas | 40701.5 |
| 5 | United Kingdom | 38073.5 |
| 6 | Taiwan | 34930.5 |
| 7 | Seychelles | 32602.0 |
| 8 | Jersey | 29869.5 |
| 9 | China | 25281.0 |
#run cell above before running this always
df_strength['weighted_degree'].plot()
plt.title('Weighted Degree Distribution (All Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Degree')
Text(0, 0.5, 'Weighted Degree')
df_indegree = pd.DataFrame({'name': list(g.vs['name']), 'weighted_indegree':g.strength(weights = 'weight', mode = 'in') } )
df_indegree = df_indegree.sort_values('weighted_indegree', ascending = False).reset_index(drop = True)
df_indegree.head(10)
| name | weighted_indegree | |
|---|---|---|
| 0 | British Virgin Islands | 280925.0 |
| 1 | Panama | 89885.0 |
| 2 | Seychelles | 31501.5 |
| 3 | Bahamas | 31085.0 |
| 4 | Niue | 18912.0 |
| 5 | United Kingdom | 16320.0 |
| 6 | Hong Kong | 14145.0 |
| 7 | Samoa | 12807.0 |
| 8 | Russia | 10958.0 |
| 9 | British Anguilla | 6480.0 |
#run cell above before running this always
df_indegree['weighted_indegree'].plot()
plt.title('Weighted In Degree Distribution (All Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted In Degree')
Text(0, 0.5, 'Weighted In Degree')
df_outdegree = pd.DataFrame({'name': list(g.vs['name']), 'weighted_outdegree':g.strength(weights = 'weight', mode = 'out') } )
df_outdegree = df_outdegree.sort_values('weighted_outdegree', ascending = False).reset_index(drop = True)
df_outdegree.head(10)
| name | weighted_outdegree | |
|---|---|---|
| 0 | Hong Kong | 98929.5 |
| 1 | Switzerland | 76988.5 |
| 2 | Taiwan | 34922.5 |
| 3 | Jersey | 29443.5 |
| 4 | China | 22683.0 |
| 5 | Luxembourg | 21769.5 |
| 6 | United Kingdom | 21753.5 |
| 7 | Panama | 20016.0 |
| 8 | Singapore | 17426.0 |
| 9 | United States | 16818.0 |
#run cell above before running this always
df_outdegree['weighted_outdegree'].plot()
plt.title('Weighted Out Degree Distribution (All Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Out Degree')
Text(0, 0.5, 'Weighted Out Degree')
df_pagerank = pd.DataFrame({'name': list(g.vs['name']), 'pagerank':g.pagerank(weights = 'weight') } )
df_pagerank = df_pagerank.sort_values('pagerank', ascending = False).reset_index(drop = True)
df_pagerank.head(10)
| name | pagerank | |
|---|---|---|
| 0 | British Virgin Islands | 0.265045 |
| 1 | Panama | 0.127240 |
| 2 | Barbados | 0.080715 |
| 3 | Bahamas | 0.050056 |
| 4 | Hong Kong | 0.046778 |
| 5 | United Kingdom | 0.046458 |
| 6 | Seychelles | 0.037662 |
| 7 | Russia | 0.032252 |
| 8 | Niue | 0.028310 |
| 9 | Bermuda | 0.016432 |
#run cell above before running this always
df_pagerank['pagerank'].plot()
plt.title('Pagerank Distribution (All Relationships)')
plt.xlabel('Countries')
plt.ylabel('Pagerank')
Text(0, 0.5, 'Pagerank')
df_authorities = pd.DataFrame({'name': list(g.vs['name']), 'authority_score':g.authority_score(weights = 'weight') } )
df_authorities = df_authorities.sort_values('authority_score', ascending = False).reset_index(drop = True)
df_authorities.head(10)
| name | authority_score | |
|---|---|---|
| 0 | British Virgin Islands | 1.000000 |
| 1 | Panama | 0.203256 |
| 2 | Seychelles | 0.102670 |
| 3 | Bahamas | 0.068544 |
| 4 | Samoa | 0.063190 |
| 5 | Niue | 0.055153 |
| 6 | United Kingdom | 0.041456 |
| 7 | Russia | 0.028038 |
| 8 | British Anguilla | 0.026740 |
| 9 | Hong Kong | 0.016538 |
#run cell above before running this always
df_authorities['authority_score'].plot()
plt.title('Authority Score Distribution (All Relationships)')
plt.xlabel('Countries')
plt.ylabel('Authority Score')
Text(0, 0.5, 'Authority Score')
# Define colors used for visualization
colors = ['blue', 'yellow', 'green','purple', 'pink', 'orange', 'lightcoral', 'plum', 'darkgreen', 'darkblue', 'rosybrown',
'crimson', 'steelblue', 'khaki', 'moccasin', 'orangered', 'fuchsia', 'thistle', 'salmon', 'chocolate', 'lightseagreen',
'springgreen', 'darkseagreen', 'deeppink', 'mediumvioletred','olive']
comm_lp= g.community_label_propagation(weights = 'weight')
ig.plot(comm_lp, mark_groups = True, layout="drl")
modularity = g.modularity(comm_lp, weights='weight')
print(modularity)
0.0012263425368253737
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_lp))
print('average degree: ', get_avg_degree(comm_lp))
print('transitivity: ', get_transitivity(comm_lp))
print('diameter: ', get_diameter(comm_lp))
average weighted degree: [0.0, 13304.311688311689, 0.0, 0.0, 84.0, 0.0, 0.0, 18.0, 0.0, 0.0, 230.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 46.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.6657988605818809, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 4, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
comm_leading_eigenvector= g.community_leading_eigenvector(weights = 'weight')
ig.plot(comm_leading_eigenvector, mark_groups = True, layout="drl")
/Users/susankoruthu/opt/anaconda3/lib/python3.8/site-packages/igraph/__init__.py:1281: RuntimeWarning: This method was developed for undirected graphs at src/community/leading_eigenvector.c:530 membership, _, q = GraphBase.community_leading_eigenvector(
modularity = g.modularity(comm_leading_eigenvector, weights='weight')
print(modularity)
0.1503749727369289
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_leading_eigenvector))
print('average degree: ', get_avg_degree(comm_leading_eigenvector))
print('transitivity: ', get_transitivity(comm_leading_eigenvector))
print('diameter: ', get_diameter(comm_leading_eigenvector))
average weighted degree: [3447.0862068965516, 4977.735042735043, 0.85, 0.0] average degree: [8.172413793103448, 24.666666666666668, 0.2, 0.0] transitivity: [0.2783573806881243, 0.45566413318856314, nan, nan] diameter: [3, 3, 1, 0]
df_leading_eigenvector = pd.DataFrame({'name': list(g.vs['name']), 'community':comm_leading_eigenvector.membership } )
df_leading_eigenvector
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Algeria | 1 |
| 3 | British Virgin Islands | 1 |
| 4 | United Kingdom | 1 |
| ... | ... | ... |
| 191 | Venezuela | 0 |
| 192 | Vietnam | 1 |
| 193 | Yemen | 0 |
| 194 | Zambia | 1 |
| 195 | Zimbabwe | 1 |
196 rows × 2 columns
df_leading_eigenvector.describe()
| community | |
|---|---|
| count | 196.000000 |
| mean | 0.816327 |
| std | 0.621775 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 1.000000 |
| 75% | 1.000000 |
| max | 3.000000 |
df_leading_eigenvector.to_csv('community_detection_csvs_13apr/comm_le_all.csv', index = False)
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_leading_eigenvector.membership)):
vertex_colors.append(colors[comm_leading_eigenvector.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g,'directed_images/communities_leading_eigenvector.png', vertex_label = g.vs['name'] , **visual_style)
# Set the layout
my_layout = g.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g,'directed_images/leading_vector_circular.png', vertex_label = g.vs['name'] , **visual_style)
for i in range(len(comm_leading_eigenvector.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_leading_eigenvector.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_leading_eigenvector.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_leading_eigenvector.subgraphs()[i],'directed_images/leading_eigenvector_subgraph' + str(i) + '.png' ,vertex_label = comm_leading_eigenvector.subgraphs()[i].vs['name'],**visual_style )
comm_spinglass = g.community_spinglass(weights = 'weight')
ig.plot(comm_spinglass, mark_groups = True, layout="drl")
modularity = g.modularity(comm_spinglass, weights='weight')
print(modularity)
0.05390156643282913
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_spinglass))
print('average degree: ', get_avg_degree(comm_spinglass))
print('transitivity: ', get_transitivity(comm_spinglass))
print('diameter: ', get_diameter(comm_spinglass))
average weighted degree: [2071.3529411764707, 1465.2631578947369, 2549.5555555555557, 313.6, 845.0, 118.26666666666667, 1084.054054054054, 230.0] average degree: [3.823529411764706, 4.947368421052632, 15.955555555555556, 2.0, 1.0, 2.4, 3.4594594594594597, 1.0] transitivity: [0.1836734693877551, 0.39, 0.5931061806656102, 0.0, nan, 0.1111111111111111, 0.05736677115987461, nan] diameter: [4, 3, 3, 1, 1, 3, 3, 1]
df_spinglass = pd.DataFrame({'name': list(g.vs['name']), 'community':comm_spinglass.membership } )
df_spinglass
| name | community | |
|---|---|---|
| 0 | Albania | 1 |
| 1 | Bahamas | 1 |
| 2 | Algeria | 2 |
| 3 | British Virgin Islands | 6 |
| 4 | United Kingdom | 2 |
| ... | ... | ... |
| 191 | Venezuela | 0 |
| 192 | Vietnam | 2 |
| 193 | Yemen | 0 |
| 194 | Zambia | 6 |
| 195 | Zimbabwe | 6 |
196 rows × 2 columns
df_spinglass.describe()
| community | |
|---|---|
| count | 196.000000 |
| mean | 3.392857 |
| std | 2.435580 |
| min | 0.000000 |
| 25% | 1.000000 |
| 50% | 2.500000 |
| 75% | 6.000000 |
| max | 7.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_spinglass.membership)):
vertex_colors.append(colors[comm_spinglass.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g,'directed_images/communities_spinglass.png', vertex_label = g.vs['name'] , **visual_style)
# Set the layout
my_layout = g.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g,'directed_images/spinglass_circular.png', vertex_label = g.vs['name'] , **visual_style)
for i in range(len(comm_spinglass.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_spinglass.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_spinglass.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_spinglass.subgraphs()[i],'directed_images/spinglass_subgraph' + str(i) + '.png' ,vertex_label = comm_spinglass.subgraphs()[i].vs['name'],**visual_style )
walktrap = g.community_walktrap(weights = 'weight', steps = 2)
comm_walktrap = walktrap.as_clustering()
ig.plot(comm_walktrap, mark_groups = True, layout="drl")
modularity = g.modularity(comm_walktrap, weights='weight')
print(modularity)
0.002050025384918412
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_walktrap))
print('average degree: ', get_avg_degree(comm_walktrap))
print('transitivity: ', get_transitivity(comm_walktrap))
print('diameter: ', get_diameter(comm_walktrap))
average weighted degree: [0.0, 7984.430656934306, 0.0, 0.0, 143.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 36.90510948905109, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.44463040899562895, nan, nan, 0.0, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
df_walktrap = pd.DataFrame({'name': list(g.vs['name']), 'community':comm_walktrap.membership } )
df_walktrap
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 1 |
| 2 | Algeria | 2 |
| 3 | British Virgin Islands | 1 |
| 4 | United Kingdom | 1 |
| ... | ... | ... |
| 191 | Venezuela | 1 |
| 192 | Vietnam | 1 |
| 193 | Yemen | 51 |
| 194 | Zambia | 52 |
| 195 | Zimbabwe | 1 |
196 rows × 2 columns
df_walktrap.describe()
| community | |
|---|---|
| count | 196.000000 |
| mean | 7.867347 |
| std | 13.652566 |
| min | 0.000000 |
| 25% | 1.000000 |
| 50% | 1.000000 |
| 75% | 4.000000 |
| max | 52.000000 |
# ##plot community
# visual_style = {}
# vertex_colors = []
# for i in range(len(comm_walktrap.membership)):
# vertex_colors.append(colors[comm_walktrap.membership[i]])
# # Set bbox and margin
# visual_style["bbox"] = (5000,5000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = vertex_colors
# # Set edge width
# visual_style["edge_width"] = np.log(g.es['weight']) + 1
# # Set edge color
# visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# # Set vertex size
# visual_style["vertex_size"] = 15
# # Set vertex label size
# visual_style["vertex_label_size"] = 20
# # Set vertex label color
# visual_style["vertex_label_color"] = 'red'
# # Don't curve the edges
# visual_style["edge_curved"] = False
# # Set the layout
# my_layout = g.layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g,'directed_images/communities_walktrap.png', vertex_label = g.vs['name'] , **visual_style)
# # Set the layout
# my_layout = g.layout_circle()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g,'directed_images/walktrap_circular.png', vertex_label = g.vs['name'] , **visual_style)
# for i in range(len(comm_walktrap.subgraphs())):
# visual_style["bbox"] = (2000,2000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = 'green'
# # Set edge width
# visual_style["edge_width"] = np.log(comm_walktrap.subgraphs()[i].es['weight']) + 1
# # Set the layout
# my_layout = comm_walktrap.subgraphs()[i].layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# ig.plot(comm_walktrap.subgraphs()[i],'directed_images/walktrap_subgraph' + str(i) + '.png' ,vertex_label = comm_walktrap.subgraphs()[i].vs['name'],**visual_style )
comm_infomap = g.community_infomap(edge_weights = 'weight')
ig.plot(comm_infomap, mark_groups = True, layout="drl")
modularity = g.modularity(comm_infomap, weights='weight')
print(modularity)
0.0
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_infomap))
print('average degree: ', get_avg_degree(comm_infomap))
print('transitivity: ', get_transitivity(comm_infomap))
print('diameter: ', get_diameter(comm_infomap))
average weighted degree: [5647.821428571428] average degree: [28.979591836734695] transitivity: [0.3859593515919117] diameter: [4]
df_infomap = pd.DataFrame({'name': list(g.vs['name']), 'community':comm_infomap.membership } )
df_infomap
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Algeria | 0 |
| 3 | British Virgin Islands | 0 |
| 4 | United Kingdom | 0 |
| ... | ... | ... |
| 191 | Venezuela | 0 |
| 192 | Vietnam | 0 |
| 193 | Yemen | 0 |
| 194 | Zambia | 0 |
| 195 | Zimbabwe | 0 |
196 rows × 2 columns
df_weighted['status'].unique()
array(['jurisdiction', 'intermediary_base', 'intermediary_jurisdiction',
'officer_base', 'officer_jurisdiction'], dtype=object)
jurisdiction_list = ['jurisdiction', 'officer_jurisdiction','intermediary jurisdiction']
df_weighted_jur = df_weighted[df_weighted['status'].isin(jurisdiction_list)]
df_jur = df_weighted_jur.drop(['status'], axis = 1)
df_jur
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Hong Kong | Samoa | 2.0 |
| 1 | Hong Kong | Samoa | 2.0 |
| 2 | Hong Kong | Samoa | 2.0 |
| 3 | Hong Kong | Samoa | 2.0 |
| 4 | Hong Kong | Samoa | 2.0 |
| ... | ... | ... | ... |
| 221136 | Indonesia | Samoa | 0.5 |
| 221137 | Cayman Islands | Samoa | 0.5 |
| 221138 | British Virgin Islands | Samoa | 0.5 |
| 221139 | China | Samoa | 0.5 |
| 221140 | China | British Virgin Islands | 0.5 |
216674 rows × 3 columns
df_jur_grouped = df_jur.groupby(['node_start','node_end'], as_index = False).sum()
df_jur_grouped
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Albania | Bahamas | 4.0 |
| 1 | Andorra | Bahamas | 52.0 |
| 2 | Andorra | British Virgin Islands | 78.0 |
| 3 | Andorra | Cayman Islands | 2.0 |
| 4 | Andorra | Costa Rica | 6.0 |
| ... | ... | ... | ... |
| 1222 | Vietnam | Panama | 2.0 |
| 1223 | Yemen | Panama | 2.0 |
| 1224 | Zambia | British Virgin Islands | 4.0 |
| 1225 | Zimbabwe | British Virgin Islands | 10.0 |
| 1226 | Zimbabwe | Panama | 6.0 |
1227 rows × 3 columns
# df_jur_grouped_list = df_jur_grouped.values.tolist()
# df_jur_grouped_list
# d = collections.defaultdict(int)
# for n1, n2, v in df_jur_grouped_list:
# d[min(n1, n2), max(n1, n2)] += v
# result = [[k[0], k[1], v] for k, v in d.items()]
# df_net = pd.DataFrame(result)
# df_net
# df_net = df_net.rename(columns = {0:'node_1', 1:'node_2', 2: 'weight'})
# df_net
#df_net.to_csv('undirected_weighted_grouped.csv', index = False)
# df_net.describe()
g_aj = ig.Graph.TupleList(df_jur_grouped.itertuples(index=False), directed=True, edge_attrs={'weight': "weight"})
ig.plot(g_aj)
g_aj.diameter()
5
print( "Clustering Coefficient: ", g_aj.transitivity_undirected())
Clustering Coefficient: 0.24745484400656814
np.mean(g_aj.degree())
14.10344827586207
np.mean(g_aj.strength(weights = 'weight'))
4979.028735632184
df_strength = pd.DataFrame({'name': list(g_aj.vs['name']), 'weighted_degree':g_aj.strength(weights = 'weight') } )
df_strength = df_strength.sort_values('weighted_degree', ascending = False).reset_index(drop = True)
df_strength.head(10)
| name | weighted_degree | |
|---|---|---|
| 0 | British Virgin Islands | 232588.5 |
| 1 | Panama | 104014.0 |
| 2 | Hong Kong | 78953.0 |
| 3 | Switzerland | 76304.0 |
| 4 | Bahamas | 40402.0 |
| 5 | Seychelles | 30510.5 |
| 6 | Jersey | 29128.5 |
| 7 | Luxembourg | 21730.0 |
| 8 | United Kingdom | 20585.0 |
| 9 | Niue | 19422.0 |
#run cell above before running this always
df_strength['weighted_degree'].plot()
plt.title('Weighted Degree Distribution (All Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Degree')
Text(0, 0.5, 'Weighted Degree')
df_indegree = pd.DataFrame({'name': list(g_aj.vs['name']), 'weighted_indegree':g_aj.strength(weights = 'weight', mode = 'in') } )
df_indegree = df_indegree.sort_values('weighted_indegree', ascending = False).reset_index(drop = True)
df_indegree.head(10)
| name | weighted_indegree | |
|---|---|---|
| 0 | British Virgin Islands | 229763.0 |
| 1 | Panama | 84486.0 |
| 2 | Bahamas | 31064.0 |
| 3 | Seychelles | 29840.5 |
| 4 | Niue | 18912.0 |
| 5 | Samoa | 12478.5 |
| 6 | British Anguilla | 6480.0 |
| 7 | Barbados | 4722.0 |
| 8 | Bermuda | 4662.0 |
| 9 | Cayman Islands | 2868.0 |
#run cell above before running this always
df_indegree['weighted_indegree'].plot()
plt.title('Weighted In Degree Distribution (All Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted In Degree')
Text(0, 0.5, 'Weighted In Degree')
df_outdegree = pd.DataFrame({'name': list(g_aj.vs['name']), 'weighted_outdegree':g_aj.strength(weights = 'weight', mode = 'out') } )
df_outdegree = df_outdegree.sort_values('weighted_outdegree', ascending = False).reset_index(drop = True)
df_outdegree.head(10)
| name | weighted_outdegree | |
|---|---|---|
| 0 | Hong Kong | 77920.0 |
| 1 | Switzerland | 76304.0 |
| 2 | Jersey | 29056.5 |
| 3 | Luxembourg | 21728.0 |
| 4 | United Kingdom | 20391.0 |
| 5 | Panama | 19528.0 |
| 6 | Guernsey | 14800.0 |
| 7 | United Arab Emirates | 14609.5 |
| 8 | Isle of Man | 9988.0 |
| 9 | United States | 9796.5 |
#run cell above before running this always
df_outdegree['weighted_outdegree'].plot()
plt.title('Weighted Out Degree Distribution (All Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Out Degree')
Text(0, 0.5, 'Weighted Out Degree')
df_pagerank = pd.DataFrame({'name': list(g_aj.vs['name']), 'pagerank':g_aj.pagerank(weights = 'weight') } )
df_pagerank = df_pagerank.sort_values('pagerank', ascending = False).reset_index(drop = True)
df_pagerank.head(10)
| name | pagerank | |
|---|---|---|
| 0 | British Virgin Islands | 0.241395 |
| 1 | Barbados | 0.173128 |
| 2 | Panama | 0.166709 |
| 3 | Bahamas | 0.063761 |
| 4 | Seychelles | 0.040720 |
| 5 | Bermuda | 0.034410 |
| 6 | Niue | 0.030839 |
| 7 | British Anguilla | 0.022724 |
| 8 | Cayman Islands | 0.019970 |
| 9 | Samoa | 0.016921 |
#run cell above before running this always
df_pagerank['pagerank'].plot()
plt.title('Pagerank Distribution (All Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Pagerank')
Text(0, 0.5, 'Pagerank')
df_authorities = pd.DataFrame({'name': list(g_aj.vs['name']), 'authority_score':g_aj.authority_score(weights = 'weight') } )
df_authorities = df_authorities.sort_values('authority_score', ascending = False).reset_index(drop = True)
df_authorities.head(10)
| name | authority_score | |
|---|---|---|
| 0 | British Virgin Islands | 1.000000 |
| 1 | Panama | 0.243957 |
| 2 | Seychelles | 0.112570 |
| 3 | Bahamas | 0.082946 |
| 4 | Samoa | 0.068108 |
| 5 | Niue | 0.066423 |
| 6 | British Anguilla | 0.029361 |
| 7 | Bermuda | 0.011364 |
| 8 | Cayman Islands | 0.006435 |
| 9 | Nevada | 0.004324 |
#run cell above before running this always
df_authorities['authority_score'].plot()
plt.title('Authority Score Distribution (All Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Authority Score')
Text(0, 0.5, 'Authority Score')
comm_lp_aj= g_aj.community_label_propagation(weights = 'weight')
ig.plot(comm_lp_aj, mark_groups = True, layout="drl")
modularity = g_aj.modularity(comm_lp_aj, weights='weight')
print(modularity)
0.014257837515459181
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_lp_aj))
print('average degree: ', get_avg_degree(comm_lp_aj))
print('transitivity: ', get_transitivity(comm_lp_aj))
print('diameter: ', get_diameter(comm_lp_aj))
average weighted degree: [0.0, 19364.363636363636, 30.0, 436.2352941176471, 148.0, 0.0, 0.0, 165.33333333333334, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 14.818181818181818, 1.0, 5.176470588235294, 4.5, 0.0, 0.0, 1.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.7441300421432872, nan, 0.2608695652173913, 0.5806451612903226, nan, nan, 0.0, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 5, 1, 3, 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
comm_leading_eigenvector_aj= g_aj.community_leading_eigenvector(weights = 'weight')
ig.plot(comm_leading_eigenvector_aj, mark_groups = True, layout="drl")
/Users/susankoruthu/opt/anaconda3/lib/python3.8/site-packages/igraph/__init__.py:1281: RuntimeWarning: This method was developed for undirected graphs at src/community/leading_eigenvector.c:530 membership, _, q = GraphBase.community_leading_eigenvector(
modularity = g_aj.modularity(comm_leading_eigenvector_aj, weights='weight')
print(modularity)
0.1392852852192658
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_leading_eigenvector_aj))
print('average degree: ', get_avg_degree(comm_leading_eigenvector_aj))
print('transitivity: ', get_transitivity(comm_leading_eigenvector_aj))
print('diameter: ', get_diameter(comm_leading_eigenvector_aj))
average weighted degree: [1810.1782178217823, 5338.041095890411] average degree: [5.326732673267327, 9.643835616438356] transitivity: [0.1493288590604027, 0.3371588523442967] diameter: [3, 4]
df_leading_eigenvector_aj= pd.DataFrame({'name': list(g_aj.vs['name']), 'community':comm_leading_eigenvector_aj.membership } )
df_leading_eigenvector_aj
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Andorra | 0 |
| 3 | British Virgin Islands | 1 |
| 4 | Cayman Islands | 1 |
| ... | ... | ... |
| 169 | Venezuela | 0 |
| 170 | Vietnam | 1 |
| 171 | Yemen | 0 |
| 172 | Zambia | 1 |
| 173 | Zimbabwe | 0 |
174 rows × 2 columns
df_leading_eigenvector_aj.describe()
| community | |
|---|---|
| count | 174.000000 |
| mean | 0.419540 |
| std | 0.494908 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 0.000000 |
| 75% | 1.000000 |
| max | 1.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_leading_eigenvector_aj.membership)):
vertex_colors.append(colors[comm_leading_eigenvector_aj.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_aj.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_aj.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_aj,'directed_all_jur_images/communities_leading_eigenvector.png', vertex_label = g_aj.vs['name'] , **visual_style)
# Set the layout
my_layout = g_aj.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_aj,'directed_all_jur_images/leading_vector_circular.png', vertex_label = g_aj.vs['name'] , **visual_style)
for i in range(len(comm_leading_eigenvector_aj.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_leading_eigenvector_aj.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_leading_eigenvector_aj.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_leading_eigenvector_aj.subgraphs()[i],'directed_all_jur_images/leading_eigenvector_subgraph' + str(i) + '.png' ,vertex_label = comm_leading_eigenvector_aj.subgraphs()[i].vs['name'],**visual_style )
comm_spinglass_aj = g_aj.community_spinglass(weights = 'weight')
ig.plot(comm_spinglass_aj, mark_groups = True, layout="drl")
modularity = g_aj.modularity(comm_spinglass_aj, weights='weight')
print(modularity)
0.13569283780013383
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_spinglass_aj))
print('average degree: ', get_avg_degree(comm_spinglass_aj))
print('transitivity: ', get_transitivity(comm_spinglass_aj))
print('diameter: ', get_diameter(comm_spinglass_aj))
average weighted degree: [2847.6545454545453, 105.11111111111111, 844.0, 3140.0, 1567.4871794871794, 659.0] average degree: [3.4909090909090907, 2.111111111111111, 1.0, 3.7, 5.384615384615385, 2.5] transitivity: [0.09441805225653206, 0.0, nan, 0.12274024738344434, 0.30280373831775703, 0.11392405063291139] diameter: [6, 1, 1, 2, 4, 3]
df_spinglass_aj = pd.DataFrame({'name': list(g_aj.vs['name']), 'community':comm_spinglass_aj.membership } )
df_spinglass_aj
| name | community | |
|---|---|---|
| 0 | Albania | 5 |
| 1 | Bahamas | 5 |
| 2 | Andorra | 3 |
| 3 | British Virgin Islands | 0 |
| 4 | Cayman Islands | 4 |
| ... | ... | ... |
| 169 | Venezuela | 3 |
| 170 | Vietnam | 4 |
| 171 | Yemen | 3 |
| 172 | Zambia | 0 |
| 173 | Zimbabwe | 0 |
174 rows × 2 columns
df_spinglass_aj.describe()
| community | |
|---|---|
| count | 174.000000 |
| mean | 2.287356 |
| std | 1.861742 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 3.000000 |
| 75% | 4.000000 |
| max | 5.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_spinglass_aj.membership)):
vertex_colors.append(colors[comm_spinglass_aj.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_aj.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_aj.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_aj,'directed_all_jur_images/communities_spinglass.png', vertex_label = g_aj.vs['name'] , **visual_style)
# Set the layout
my_layout = g_aj.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_aj,'directed_all_jur_images/spinglass_circular.png', vertex_label = g_aj.vs['name'] , **visual_style)
for i in range(len(comm_spinglass_aj.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_spinglass_aj.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_spinglass_aj.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_spinglass_aj.subgraphs()[i],'directed_all_jur_images/spinglass_subgraph' + str(i) + '.png' ,vertex_label = comm_spinglass_aj.subgraphs()[i].vs['name'],**visual_style )
walktrap_aj = g_aj.community_walktrap(weights = 'weight', steps = 2)
comm_walktrap_aj = walktrap_aj.as_clustering()
ig.plot(comm_walktrap_aj, mark_groups = True, layout="drl")
modularity = g_aj.modularity(comm_walktrap_aj, weights='weight')
print(modularity)
0.003886750362990465
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_walktrap_aj))
print('average degree: ', get_avg_degree(comm_walktrap_aj))
print('transitivity: ', get_transitivity(comm_walktrap_aj))
print('diameter: ', get_diameter(comm_walktrap_aj))
average weighted degree: [0.0, 6545.106060606061, 0.0, 0.0, 0.0, 0.0, 844.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 17.545454545454547, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.27739790086251687, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
df_walktrap_aj = pd.DataFrame({'name': list(g_aj.vs['name']), 'community':comm_walktrap_aj.membership } )
df_walktrap_aj
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 1 |
| 2 | Andorra | 1 |
| 3 | British Virgin Islands | 1 |
| 4 | Cayman Islands | 1 |
| ... | ... | ... |
| 169 | Venezuela | 1 |
| 170 | Vietnam | 1 |
| 171 | Yemen | 40 |
| 172 | Zambia | 41 |
| 173 | Zimbabwe | 1 |
174 rows × 2 columns
df_walktrap_aj.describe()
| community | |
|---|---|
| count | 174.000000 |
| mean | 5.735632 |
| std | 10.274791 |
| min | 0.000000 |
| 25% | 1.000000 |
| 50% | 1.000000 |
| 75% | 1.000000 |
| max | 41.000000 |
# ##plot community
# visual_style = {}
# vertex_colors = []
# for i in range(len(comm_walktrap_aj.membership)):
# vertex_colors.append(colors[comm_walktrap_aj.membership[i]])
# # Set bbox and margin
# visual_style["bbox"] = (5000,5000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = vertex_colors
# # Set edge width
# visual_style["edge_width"] = np.log(g_aj.es['weight']) + 1
# # Set edge color
# visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# # Set vertex size
# visual_style["vertex_size"] = 15
# # Set vertex label size
# visual_style["vertex_label_size"] = 20
# # Set vertex label color
# visual_style["vertex_label_color"] = 'red'
# # Don't curve the edges
# visual_style["edge_curved"] = False
# # Set the layout
# my_layout = g_aj.layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_aj,'directed_all_jur_images/communities_walktrap.png', vertex_label = g_aj.vs['name'] , **visual_style)
# # Set the layout
# my_layout = g.layout_circle()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_aj,'directed_all_jur_images/walktrap_circular.png', vertex_label = g_aj.vs['name'] , **visual_style)
# for i in range(len(comm_walktrap_aj.subgraphs())):
# visual_style["bbox"] = (2000,2000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = 'green'
# # Set edge width
# visual_style["edge_width"] = np.log(comm_walktrap_aj.subgraphs()[i].es['weight']) + 1
# # Set the layout
# my_layout = comm_walktrap_aj.subgraphs()[i].layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# ig.plot(comm_walktrap_aj.subgraphs()[i],'directed_all_jur_images/walktrap_subgraph' + str(i) + '.png' ,vertex_label = comm_walktrap_aj.subgraphs()[i].vs['name'],**visual_style )
comm_infomap_aj = g_aj.community_infomap(edge_weights = 'weight')
ig.plot(comm_infomap_aj, mark_groups = True, layout="drl")
modularity = g_aj.modularity(comm_infomap_aj, weights='weight')
print(modularity)
0.0
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_infomap_aj))
print('average degree: ', get_avg_degree(comm_infomap_aj))
print('transitivity: ', get_transitivity(comm_infomap_aj))
print('diameter: ', get_diameter(comm_infomap_aj))
average weighted degree: [4979.028735632184] average degree: [14.10344827586207] transitivity: [0.24745484400656814] diameter: [5]
df_infomap_aj = pd.DataFrame({'name': list(g_aj.vs['name']), 'community':comm_infomap_aj.membership } )
df_infomap_aj
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Andorra | 0 |
| 3 | British Virgin Islands | 0 |
| 4 | Cayman Islands | 0 |
| ... | ... | ... |
| 169 | Venezuela | 0 |
| 170 | Vietnam | 0 |
| 171 | Yemen | 0 |
| 172 | Zambia | 0 |
| 173 | Zimbabwe | 0 |
174 rows × 2 columns
df_weighted['status'].unique()
array(['jurisdiction', 'intermediary_base', 'intermediary_jurisdiction',
'officer_base', 'officer_jurisdiction'], dtype=object)
jurisdiction_list = ['jurisdiction']
df_weighted_jur = df_weighted[df_weighted['status'].isin(jurisdiction_list)]
df_jur = df_weighted_jur.drop(['status'], axis = 1)
df_jur
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Hong Kong | Samoa | 2.0 |
| 1 | Hong Kong | Samoa | 2.0 |
| 2 | Hong Kong | Samoa | 2.0 |
| 3 | Hong Kong | Samoa | 2.0 |
| 4 | Hong Kong | Samoa | 2.0 |
| ... | ... | ... | ... |
| 216554 | British Virgin Islands | Cook Islands | 2.0 |
| 216555 | British Virgin Islands | Cook Islands | 2.0 |
| 216556 | British Virgin Islands | Cook Islands | 2.0 |
| 216557 | British Virgin Islands | Cook Islands | 2.0 |
| 216558 | Vanuatu | Cook Islands | 2.0 |
216559 rows × 3 columns
df_jur_grouped = df_jur.groupby(['node_start','node_end'], as_index = False).sum()
df_jur_grouped
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Albania | Bahamas | 4.0 |
| 1 | Andorra | Bahamas | 52.0 |
| 2 | Andorra | British Virgin Islands | 78.0 |
| 3 | Andorra | Cayman Islands | 2.0 |
| 4 | Andorra | Costa Rica | 6.0 |
| ... | ... | ... | ... |
| 1212 | Vietnam | Panama | 2.0 |
| 1213 | Yemen | Panama | 2.0 |
| 1214 | Zambia | British Virgin Islands | 4.0 |
| 1215 | Zimbabwe | British Virgin Islands | 10.0 |
| 1216 | Zimbabwe | Panama | 6.0 |
1217 rows × 3 columns
# df_jur_grouped_list = df_jur_grouped.values.tolist()
# df_jur_grouped_list
# d = collections.defaultdict(int)
# for n1, n2, v in df_jur_grouped_list:
# d[min(n1, n2), max(n1, n2)] += v
# result = [[k[0], k[1], v] for k, v in d.items()]
# df_net = pd.DataFrame(result)
# df_net
# df_net = df_net.rename(columns = {0:'node_1', 1:'node_2', 2: 'weight'})
# df_net
#df_net.to_csv('undirected_weighted_grouped.csv', index = False)
# df_net.describe()
g_j = ig.Graph.TupleList(df_jur_grouped.itertuples(index=False), directed=True, edge_attrs={'weight': "weight"})
ig.plot(g_j)
g_j.diameter()
5
print( "Clustering Coefficient: ", g_j.transitivity_undirected())
Clustering Coefficient: 0.24672505151604357
np.mean(g_j.degree())
13.988505747126437
np.mean(g_j.strength(weights = 'weight'))
4978.367816091954
df_strength = pd.DataFrame({'name': list(g_j.vs['name']), 'weighted_degree':g_j.strength(weights = 'weight') } )
df_strength = df_strength.sort_values('weighted_degree', ascending = False).reset_index(drop = True)
df_strength
| name | weighted_degree | |
|---|---|---|
| 0 | British Virgin Islands | 232540.0 |
| 1 | Panama | 104014.0 |
| 2 | Hong Kong | 78946.0 |
| 3 | Switzerland | 76304.0 |
| 4 | Bahamas | 40402.0 |
| ... | ... | ... |
| 169 | Malawi | 2.0 |
| 170 | Nauru | 2.0 |
| 171 | Sudan | 2.0 |
| 172 | Saint Martin (French part) | 2.0 |
| 173 | Guam | 2.0 |
174 rows × 2 columns
df_strength = pd.DataFrame({'name': list(g_j.vs['name']), 'weighted_degree':g_j.strength(weights = 'weight') } )
df_strength = df_strength.sort_values('weighted_degree', ascending = False).reset_index(drop = True)
df_strength.head(10)
| name | weighted_degree | |
|---|---|---|
| 0 | British Virgin Islands | 232540.0 |
| 1 | Panama | 104014.0 |
| 2 | Hong Kong | 78946.0 |
| 3 | Switzerland | 76304.0 |
| 4 | Bahamas | 40402.0 |
| 5 | Seychelles | 30510.0 |
| 6 | Jersey | 29128.0 |
| 7 | Luxembourg | 21730.0 |
| 8 | United Kingdom | 20584.0 |
| 9 | Niue | 19422.0 |
#run cell above before running this always
df_strength['weighted_degree'].plot()
plt.title('Weighted Degree Distribution (Only Jurisdiction Relationship)')
plt.xlabel('Countries')
plt.ylabel('Weighted Degree')
Text(0, 0.5, 'Weighted Degree')
df_indegree = pd.DataFrame({'name': list(g_j.vs['name']), 'weighted_indegree':g_j.strength(weights = 'weight', mode = 'in') } )
df_indegree = df_indegree.sort_values('weighted_indegree', ascending = False).reset_index(drop = True)
df_indegree.head(10)
| name | weighted_indegree | |
|---|---|---|
| 0 | British Virgin Islands | 229716.0 |
| 1 | Panama | 84486.0 |
| 2 | Bahamas | 31064.0 |
| 3 | Seychelles | 29840.0 |
| 4 | Niue | 18912.0 |
| 5 | Samoa | 12474.0 |
| 6 | British Anguilla | 6480.0 |
| 7 | Barbados | 4722.0 |
| 8 | Bermuda | 4662.0 |
| 9 | Cayman Islands | 2868.0 |
#run cell above before running this always
df_indegree['weighted_indegree'].plot()
plt.title('Weighted In Degree Distribution (Only Jurisdiction Relationship)')
plt.xlabel('Countries')
plt.ylabel('Weighted In Degree')
Text(0, 0.5, 'Weighted In Degree')
df_outdegree = pd.DataFrame({'name': list(g_j.vs['name']), 'weighted_outdegree':g_j.strength(weights = 'weight', mode = 'out') } )
df_outdegree = df_outdegree.sort_values('weighted_outdegree', ascending = False).reset_index(drop = True)
df_outdegree.head(10)
| name | weighted_outdegree | |
|---|---|---|
| 0 | Hong Kong | 77914.0 |
| 1 | Switzerland | 76304.0 |
| 2 | Jersey | 29056.0 |
| 3 | Luxembourg | 21728.0 |
| 4 | United Kingdom | 20390.0 |
| 5 | Panama | 19528.0 |
| 6 | Guernsey | 14800.0 |
| 7 | United Arab Emirates | 14608.0 |
| 8 | Isle of Man | 9988.0 |
| 9 | United States | 9792.0 |
#run cell above before running this always
df_outdegree['weighted_outdegree'].plot()
plt.title('Weighted Out Degree Distribution (Only Jurisdiction Relationship)')
plt.xlabel('Countries')
plt.ylabel('Weighted Out Degree')
Text(0, 0.5, 'Weighted Out Degree')
df_pagerank = pd.DataFrame({'name': list(g_j.vs['name']), 'pagerank':g_j.pagerank(weights = 'weight') } )
df_pagerank = df_pagerank.sort_values('pagerank', ascending = False).reset_index(drop = True)
df_pagerank.head(10)
| name | pagerank | |
|---|---|---|
| 0 | British Virgin Islands | 0.241222 |
| 1 | Barbados | 0.173126 |
| 2 | Panama | 0.166738 |
| 3 | Bahamas | 0.063767 |
| 4 | Seychelles | 0.040682 |
| 5 | Bermuda | 0.034434 |
| 6 | Niue | 0.030858 |
| 7 | British Anguilla | 0.022747 |
| 8 | Cayman Islands | 0.019978 |
| 9 | Samoa | 0.016894 |
#run cell above before running this always
df_pagerank['pagerank'].plot()
plt.title('Pagerank Distribution (Only Jurisdiction Relationship)')
plt.xlabel('Countries')
plt.ylabel('Pagerank')
Text(0, 0.5, 'Pagerank')
df_authorities = pd.DataFrame({'name': list(g_j.vs['name']), 'authority_score':g_j.authority_score(weights = 'weight') } )
df_authorities = df_authorities.sort_values('authority_score', ascending = False).reset_index(drop = True)
df_authorities.head(10)
| name | authority_score | |
|---|---|---|
| 0 | British Virgin Islands | 1.000000 |
| 1 | Panama | 0.243992 |
| 2 | Seychelles | 0.112577 |
| 3 | Bahamas | 0.082955 |
| 4 | Samoa | 0.068108 |
| 5 | Niue | 0.066430 |
| 6 | British Anguilla | 0.029362 |
| 7 | Bermuda | 0.011364 |
| 8 | Cayman Islands | 0.006434 |
| 9 | Nevada | 0.004324 |
#run cell above before running this always
df_authorities['authority_score'].plot()
plt.title('Authority Score Distribution (Only Jurisdiction Relationship)')
plt.xlabel('Countries')
plt.ylabel('Authority Score')
Text(0, 0.5, 'Authority Score')
comm_lp_j= g_j.community_label_propagation(weights = 'weight')
ig.plot(comm_lp_j, mark_groups = True, layout="drl")
modularity = g_j.modularity(comm_lp_j, weights='weight')
print(modularity)
0.017876015599913773
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_lp_j))
print('average degree: ', get_avg_degree(comm_lp_j))
print('transitivity: ', get_transitivity(comm_lp_j))
print('diameter: ', get_diameter(comm_lp_j))
average weighted degree: [0.0, 23828.470588235294, 30.0, 467.45454545454544, 147.5, 0.0, 0.0, 18.0, 0.0, 0.0, 0.0, 230.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 14.0, 1.0, 6.0, 4.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.8377581120943953, nan, 0.33994334277620397, 0.5142857142857142, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 3, 1, 4, 3, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
comm_leading_eigenvector_j= g_j.community_leading_eigenvector(weights = 'weight')
ig.plot(comm_leading_eigenvector_j, mark_groups = True, layout="drl")
/Users/susankoruthu/opt/anaconda3/lib/python3.8/site-packages/igraph/__init__.py:1281: RuntimeWarning: This method was developed for undirected graphs at src/community/leading_eigenvector.c:530 membership, _, q = GraphBase.community_leading_eigenvector(
modularity = g_j.modularity(comm_leading_eigenvector_j, weights='weight')
print(modularity)
0.13928311330179288
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_leading_eigenvector_j))
print('average degree: ', get_avg_degree(comm_leading_eigenvector_j))
print('transitivity: ', get_transitivity(comm_leading_eigenvector_j))
print('diameter: ', get_diameter(comm_leading_eigenvector_j))
average weighted degree: [1792.4313725490197, 5410.888888888889] average degree: [5.2745098039215685, 9.61111111111111] transitivity: [0.1493288590604027, 0.33534222987516143] diameter: [3, 4]
df_leading_eigenvector_j= pd.DataFrame({'name': list(g_j.vs['name']), 'community':comm_leading_eigenvector_j.membership } )
df_leading_eigenvector_j
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Andorra | 0 |
| 3 | British Virgin Islands | 1 |
| 4 | Cayman Islands | 1 |
| ... | ... | ... |
| 169 | Venezuela | 0 |
| 170 | Vietnam | 1 |
| 171 | Yemen | 0 |
| 172 | Zambia | 1 |
| 173 | Zimbabwe | 0 |
174 rows × 2 columns
df_leading_eigenvector_j.describe()
| community | |
|---|---|
| count | 174.000000 |
| mean | 0.413793 |
| std | 0.493934 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 0.000000 |
| 75% | 1.000000 |
| max | 1.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_leading_eigenvector_j.membership)):
vertex_colors.append(colors[comm_leading_eigenvector_j.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_j.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_j.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_j,'directed_all_jur_images/communities_leading_eigenvector.png', vertex_label = g_j.vs['name'] , **visual_style)
# Set the layout
my_layout = g_j.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_j,'directed_all_jur_images/leading_vector_circular.png', vertex_label = g_j.vs['name'] , **visual_style)
for i in range(len(comm_leading_eigenvector_j.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_leading_eigenvector_j.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_leading_eigenvector_j.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_leading_eigenvector_j.subgraphs()[i],'directed_all_jur_images/leading_eigenvector_subgraph' + str(i) + '.png' ,vertex_label = comm_leading_eigenvector_j.subgraphs()[i].vs['name'],**visual_style )
comm_spinglass_j = g_j.community_spinglass(weights = 'weight')
ig.plot(comm_spinglass_j, mark_groups = True, layout="drl")
modularity = g_j.modularity(comm_spinglass_j, weights='weight')
print(modularity)
0.10736202089136093
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_spinglass_j))
print('average degree: ', get_avg_degree(comm_spinglass_j))
print('transitivity: ', get_transitivity(comm_spinglass_j))
print('diameter: ', get_diameter(comm_spinglass_j))
average weighted degree: [1556.6, 125.0, 145.6, 5716.279069767442, 519.0, 1353.3617021276596] average degree: [5.4, 1.5, 2.1, 3.953488372093023, 2.4, 3.1914893617021276] transitivity: [0.29261862917398945, 0.0, 0.0, 0.1848101265822785, 0.07142857142857142, 0.08368554522400676] diameter: [4, 2, 1, 6, 3, 2]
df_spinglass_j = pd.DataFrame({'name': list(g_j.vs['name']), 'community':comm_spinglass_j.membership } )
df_spinglass_j
| name | community | |
|---|---|---|
| 0 | Albania | 4 |
| 1 | Bahamas | 4 |
| 2 | Andorra | 5 |
| 3 | British Virgin Islands | 3 |
| 4 | Cayman Islands | 0 |
| ... | ... | ... |
| 169 | Venezuela | 5 |
| 170 | Vietnam | 0 |
| 171 | Yemen | 5 |
| 172 | Zambia | 3 |
| 173 | Zimbabwe | 5 |
174 rows × 2 columns
df_spinglass_j.describe()
| community | |
|---|---|
| count | 174.000000 |
| mean | 2.804598 |
| std | 1.858205 |
| min | 0.000000 |
| 25% | 1.250000 |
| 50% | 3.000000 |
| 75% | 5.000000 |
| max | 5.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_spinglass_j.membership)):
vertex_colors.append(colors[comm_spinglass_j.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_j.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_j.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_j,'directed_all_jur_images/communities_spinglass.png', vertex_label = g_j.vs['name'] , **visual_style)
# Set the layout
my_layout = g_j.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_j,'directed_all_jur_images/spinglass_circular.png', vertex_label = g_j.vs['name'] , **visual_style)
for i in range(len(comm_spinglass_j.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_spinglass_j.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_spinglass_j.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_spinglass_j.subgraphs()[i],'directed_all_jur_images/spinglass_subgraph' + str(i) + '.png' ,vertex_label = comm_spinglass_j.subgraphs()[i].vs['name'],**visual_style )
walktrap_j = g_j.community_walktrap(weights = 'weight', steps = 2)
comm_walktrap_j = walktrap_j.as_clustering()
ig.plot(comm_walktrap_j, mark_groups = True, layout="drl")
modularity = g_j.modularity(comm_walktrap_j, weights='weight')
print(modularity)
0.003887288169812182
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_walktrap_j))
print('average degree: ', get_avg_degree(comm_walktrap_j))
print('transitivity: ', get_transitivity(comm_walktrap_j))
print('diameter: ', get_diameter(comm_walktrap_j))
average weighted degree: [0.0, 6544.272727272727, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 844.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 17.439393939393938, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.27605592593366635, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
df_walktrap_j = pd.DataFrame({'name': list(g_j.vs['name']), 'community':comm_walktrap_j.membership } )
df_walktrap_j
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 1 |
| 2 | Andorra | 1 |
| 3 | British Virgin Islands | 1 |
| 4 | Cayman Islands | 1 |
| ... | ... | ... |
| 169 | Venezuela | 1 |
| 170 | Vietnam | 1 |
| 171 | Yemen | 40 |
| 172 | Zambia | 41 |
| 173 | Zimbabwe | 1 |
174 rows × 2 columns
df_walktrap_j.describe()
| community | |
|---|---|
| count | 174.000000 |
| mean | 5.764368 |
| std | 10.282523 |
| min | 0.000000 |
| 25% | 1.000000 |
| 50% | 1.000000 |
| 75% | 1.000000 |
| max | 41.000000 |
# ##plot community
# visual_style = {}
# vertex_colors = []
# for i in range(len(comm_walktrap_j.membership)):
# vertex_colors.append(colors[comm_walktrap_j.membership[i]])
# # Set bbox and margin
# visual_style["bbox"] = (5000,5000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = vertex_colors
# # Set edge width
# visual_style["edge_width"] = np.log(g_j.es['weight']) + 1
# # Set edge color
# visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# # Set vertex size
# visual_style["vertex_size"] = 15
# # Set vertex label size
# visual_style["vertex_label_size"] = 20
# # Set vertex label color
# visual_style["vertex_label_color"] = 'red'
# # Don't curve the edges
# visual_style["edge_curved"] = False
# # Set the layout
# my_layout = g_j.layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_j,'directed_all_jur_images/communities_walktrap.png', vertex_label = g_j.vs['name'] , **visual_style)
# # Set the layout
# my_layout = g_j.layout_circle()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_j,'directed_all_jur_images/walktrap_circular.png', vertex_label = g_j.vs['name'] , **visual_style)
# for i in range(len(comm_walktrap_j.subgraphs())):
# visual_style["bbox"] = (2000,2000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = 'green'
# # Set edge width
# visual_style["edge_width"] = np.log(comm_walktrap_j.subgraphs()[i].es['weight']) + 1
# # Set the layout
# my_layout = comm_walktrap_j.subgraphs()[i].layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# ig.plot(comm_walktrap_j.subgraphs()[i],'directed_all_jur_images/walktrap_subgraph' + str(i) + '.png' ,vertex_label = comm_walktrap_j.subgraphs()[i].vs['name'],**visual_style )
comm_infomap_j = g_j.community_infomap(edge_weights = 'weight')
ig.plot(comm_infomap_j, mark_groups = True, layout="drl")
modularity = g_j.modularity(comm_infomap_j, weights='weight')
print(modularity)
0.0
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_infomap_j))
print('average degree: ', get_avg_degree(comm_infomap_j))
print('transitivity: ', get_transitivity(comm_infomap_j))
print('diameter: ', get_diameter(comm_infomap_j))
average weighted degree: [4978.367816091954] average degree: [13.988505747126437] transitivity: [0.24672505151604357] diameter: [5]
df_infomap_j = pd.DataFrame({'name': list(g_j.vs['name']), 'community':comm_infomap_j.membership } )
df_infomap_j
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Andorra | 0 |
| 3 | British Virgin Islands | 0 |
| 4 | Cayman Islands | 0 |
| ... | ... | ... |
| 169 | Venezuela | 0 |
| 170 | Vietnam | 0 |
| 171 | Yemen | 0 |
| 172 | Zambia | 0 |
| 173 | Zimbabwe | 0 |
174 rows × 2 columns
df_weighted['status'].unique()
array(['jurisdiction', 'intermediary_base', 'intermediary_jurisdiction',
'officer_base', 'officer_jurisdiction'], dtype=object)
jurisdiction_list = ['officer_jurisdiction']
df_weighted_jur = df_weighted[df_weighted['status'].isin(jurisdiction_list)]
df_jur = df_weighted_jur.drop(['status'], axis = 1)
df_jur
| node_start | node_end | weight | |
|---|---|---|---|
| 221026 | Jersey | British Virgin Islands | 0.5 |
| 221027 | China | British Virgin Islands | 0.5 |
| 221028 | China | British Virgin Islands | 0.5 |
| 221029 | United States | British Virgin Islands | 0.5 |
| 221030 | Indonesia | British Virgin Islands | 0.5 |
| ... | ... | ... | ... |
| 221136 | Indonesia | Samoa | 0.5 |
| 221137 | Cayman Islands | Samoa | 0.5 |
| 221138 | British Virgin Islands | Samoa | 0.5 |
| 221139 | China | Samoa | 0.5 |
| 221140 | China | British Virgin Islands | 0.5 |
115 rows × 3 columns
df_jur_grouped = df_jur.groupby(['node_start','node_end'], as_index = False).sum()
df_jur_grouped
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | British Virgin Islands | Cook Islands | 0.5 |
| 1 | British Virgin Islands | Samoa | 0.5 |
| 2 | British Virgin Islands | Seychelles | 0.5 |
| 3 | Canada | British Virgin Islands | 0.5 |
| 4 | Cayman Islands | British Virgin Islands | 0.5 |
| 5 | Cayman Islands | Samoa | 0.5 |
| 6 | China | British Virgin Islands | 7.5 |
| 7 | China | Labuan | 0.5 |
| 8 | China | Samoa | 1.0 |
| 9 | China | Singapore | 0.5 |
| 10 | Czech Republic | British Virgin Islands | 0.5 |
| 11 | Fiji | British Virgin Islands | 0.5 |
| 12 | Hong Kong | British Virgin Islands | 6.0 |
| 13 | India | British Virgin Islands | 2.0 |
| 14 | India | Samoa | 0.5 |
| 15 | Indonesia | British Virgin Islands | 4.5 |
| 16 | Indonesia | Samoa | 0.5 |
| 17 | Italy | Cook Islands | 0.5 |
| 18 | Japan | British Virgin Islands | 1.0 |
| 19 | Jersey | British Virgin Islands | 0.5 |
| 20 | Macao | Cook Islands | 0.5 |
| 21 | Malaysia | British Virgin Islands | 1.0 |
| 22 | Monaco | British Virgin Islands | 0.5 |
| 23 | New Zealand | British Virgin Islands | 0.5 |
| 24 | Philippines | British Virgin Islands | 1.0 |
| 25 | Romania | British Virgin Islands | 0.5 |
| 26 | Samoa | British Virgin Islands | 0.5 |
| 27 | Singapore | British Virgin Islands | 1.5 |
| 28 | Singapore | Hong Kong | 0.5 |
| 29 | Singapore | Samoa | 0.5 |
| 30 | South Korea | British Virgin Islands | 0.5 |
| 31 | Taiwan | British Virgin Islands | 11.0 |
| 32 | Taiwan | Hong Kong | 0.5 |
| 33 | Taiwan | Labuan | 0.5 |
| 34 | Taiwan | Samoa | 0.5 |
| 35 | Thailand | Samoa | 0.5 |
| 36 | US Virgin Islands | British Virgin Islands | 0.5 |
| 37 | US Virgin Islands | Singapore | 0.5 |
| 38 | United Arab Emirates | British Virgin Islands | 1.5 |
| 39 | United Kingdom | British Virgin Islands | 1.0 |
| 40 | United States | British Virgin Islands | 3.5 |
| 41 | United States | Cook Islands | 1.0 |
# df_jur_grouped_list = df_jur_grouped.values.tolist()
# df_jur_grouped_list
# d = collections.defaultdict(int)
# for n1, n2, v in df_jur_grouped_list:
# d[min(n1, n2), max(n1, n2)] += v
# result = [[k[0], k[1], v] for k, v in d.items()]
# df_net = pd.DataFrame(result)
# df_net
# df_net = df_net.rename(columns = {0:'node_1', 1:'node_2', 2: 'weight'})
# df_net
#df_net.to_csv('undirected_weighted_grouped.csv', index = False)
# df_net.describe()
g_oj = ig.Graph.TupleList(df_jur_grouped.itertuples(index=False), directed=True, edge_attrs={'weight': "weight"})
ig.plot(g_oj)
g_oj.diameter()
3
print( "Clustering Coefficient: ", g_oj.transitivity_undirected())
Clustering Coefficient: 0.09863013698630137
np.mean(g_oj.degree())
2.8
np.mean(g_oj.strength(weights = 'weight'))
3.8333333333333335
df_strength = pd.DataFrame({'name': list(g_oj.vs['name']), 'weighted_degree':g_oj.strength(weights = 'weight') } )
df_strength = df_strength.sort_values('weighted_degree', ascending = False).reset_index(drop = True)
df_strength.head(10)
| name | weighted_degree | |
|---|---|---|
| 0 | British Virgin Islands | 48.5 |
| 1 | Taiwan | 12.5 |
| 2 | China | 9.5 |
| 3 | Hong Kong | 7.0 |
| 4 | Samoa | 5.0 |
| 5 | Indonesia | 5.0 |
| 6 | United States | 4.5 |
| 7 | Singapore | 3.5 |
| 8 | Cook Islands | 2.5 |
| 9 | India | 2.5 |
#run cell above before running this always
df_strength['weighted_degree'].plot()
plt.title('Weighted Degree Distribution (Officer Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Degree')
Text(0, 0.5, 'Weighted Degree')
df_indegree = pd.DataFrame({'name': list(g_oj.vs['name']), 'weighted_indegree':g_oj.strength(weights = 'weight', mode = 'in') } )
df_indegree = df_indegree.sort_values('weighted_indegree', ascending = False).reset_index(drop = True)
df_indegree.head(10)
| name | weighted_indegree | |
|---|---|---|
| 0 | British Virgin Islands | 47.0 |
| 1 | Samoa | 4.5 |
| 2 | Cook Islands | 2.5 |
| 3 | Hong Kong | 1.0 |
| 4 | Labuan | 1.0 |
| 5 | Singapore | 1.0 |
| 6 | Seychelles | 0.5 |
| 7 | New Zealand | 0.0 |
| 8 | Philippines | 0.0 |
| 9 | Romania | 0.0 |
#run cell above before running this always
df_indegree['weighted_indegree'].plot()
plt.title('Weighted In Degree Distribution (Officer Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted In Degree')
Text(0, 0.5, 'Weighted In Degree')
df_outdegree = pd.DataFrame({'name': list(g_oj.vs['name']), 'weighted_outdegree':g_oj.strength(weights = 'weight', mode = 'out') } )
df_outdegree = df_outdegree.sort_values('weighted_outdegree', ascending = False).reset_index(drop = True)
df_outdegree.head(10)
| name | weighted_outdegree | |
|---|---|---|
| 0 | Taiwan | 12.5 |
| 1 | China | 9.5 |
| 2 | Hong Kong | 6.0 |
| 3 | Indonesia | 5.0 |
| 4 | United States | 4.5 |
| 5 | Singapore | 2.5 |
| 6 | India | 2.5 |
| 7 | United Arab Emirates | 1.5 |
| 8 | British Virgin Islands | 1.5 |
| 9 | Japan | 1.0 |
#run cell above before running this always
df_outdegree['weighted_outdegree'].plot()
plt.title('Weighted Out Degree Distribution (All Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Out Degree')
Text(0, 0.5, 'Weighted Out Degree')
df_pagerank = pd.DataFrame({'name': list(g_oj.vs['name']), 'pagerank':g_oj.pagerank(weights = 'weight') } )
df_pagerank = df_pagerank.sort_values('pagerank', ascending = False).reset_index(drop = True)
df_pagerank.head(10)
| name | pagerank | |
|---|---|---|
| 0 | British Virgin Islands | 0.324553 |
| 1 | Samoa | 0.126458 |
| 2 | Cook Islands | 0.126287 |
| 3 | Seychelles | 0.103840 |
| 4 | Singapore | 0.017466 |
| 5 | Hong Kong | 0.015257 |
| 6 | Labuan | 0.012819 |
| 7 | New Zealand | 0.011883 |
| 8 | Philippines | 0.011883 |
| 9 | Romania | 0.011883 |
#run cell above before running this always
df_pagerank['pagerank'].plot()
plt.title('Pagerank Distribution (Officer Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Pagerank')
Text(0, 0.5, 'Pagerank')
df_authorities = pd.DataFrame({'name': list(g_oj.vs['name']), 'authority_score':g_oj.authority_score(weights = 'weight') } )
df_authorities = df_authorities.sort_values('authority_score', ascending = False).reset_index(drop = True)
df_authorities.head(10)
| name | authority_score | |
|---|---|---|
| 0 | British Virgin Islands | 1.000000 |
| 1 | Samoa | 0.066541 |
| 2 | Labuan | 0.035502 |
| 3 | Hong Kong | 0.023994 |
| 4 | Singapore | 0.015414 |
| 5 | Cook Islands | 0.013475 |
| 6 | Seychelles | 0.000076 |
| 7 | New Zealand | 0.000000 |
| 8 | Philippines | 0.000000 |
| 9 | Romania | 0.000000 |
#run cell above before running this always
df_authorities['authority_score'].plot()
plt.title('Authority Score Distribution (Officer Jurisdiction Relationships)')
plt.xlabel('Countries')
plt.ylabel('Authority Score')
Text(0, 0.5, 'Authority Score')
comm_lp_oj= g_oj.community_label_propagation(weights = 'weight')
ig.plot(comm_lp_oj, mark_groups = True, layout="drl")
modularity = g_oj.modularity(comm_lp_oj, weights='weight')
print(modularity)
0.03795841209829868
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_lp_oj))
print('average degree: ', get_avg_degree(comm_lp_oj))
print('transitivity: ', get_transitivity(comm_lp_oj))
print('diameter: ', get_diameter(comm_lp_oj))
average weighted degree: [7.8, 1.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [2.8, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [0.5454545454545454, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [2, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
comm_leading_eigenvector_oj= g_oj.community_leading_eigenvector(weights = 'weight')
ig.plot(comm_leading_eigenvector_oj, mark_groups = True, layout="drl")
/Users/susankoruthu/opt/anaconda3/lib/python3.8/site-packages/igraph/__init__.py:1281: RuntimeWarning: This method was developed for undirected graphs at src/community/leading_eigenvector.c:530 membership, _, q = GraphBase.community_leading_eigenvector(
modularity = g_oj.modularity(comm_leading_eigenvector_oj, weights='weight')
print(modularity)
0.10638941398865785
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_leading_eigenvector_oj))
print('average degree: ', get_avg_degree(comm_leading_eigenvector_oj))
print('transitivity: ', get_transitivity(comm_leading_eigenvector_oj))
print('diameter: ', get_diameter(comm_leading_eigenvector_oj))
average weighted degree: [4.05, 1.0, 0.8333333333333334] average degree: [2.1, 1.5, 1.6666666666666667] transitivity: [0.018867924528301886, 0.0, 0.0] diameter: [2, 1, 2]
df_leading_eigenvector_oj= pd.DataFrame({'name': list(g_oj.vs['name']), 'community':comm_leading_eigenvector_oj.membership } )
df_leading_eigenvector_oj
| name | community | |
|---|---|---|
| 0 | British Virgin Islands | 0 |
| 1 | Cook Islands | 1 |
| 2 | Samoa | 2 |
| 3 | Seychelles | 0 |
| 4 | Canada | 0 |
| 5 | Cayman Islands | 2 |
| 6 | China | 0 |
| 7 | Labuan | 0 |
| 8 | Singapore | 2 |
| 9 | Czech Republic | 0 |
| 10 | Fiji | 0 |
| 11 | Hong Kong | 0 |
| 12 | India | 2 |
| 13 | Indonesia | 0 |
| 14 | Italy | 1 |
| 15 | Japan | 0 |
| 16 | Jersey | 0 |
| 17 | Macao | 1 |
| 18 | Malaysia | 0 |
| 19 | Monaco | 0 |
| 20 | New Zealand | 0 |
| 21 | Philippines | 0 |
| 22 | Romania | 0 |
| 23 | South Korea | 0 |
| 24 | Taiwan | 0 |
| 25 | Thailand | 2 |
| 26 | US Virgin Islands | 2 |
| 27 | United Arab Emirates | 0 |
| 28 | United Kingdom | 0 |
| 29 | United States | 1 |
df_leading_eigenvector_oj.describe()
| community | |
|---|---|
| count | 30.000000 |
| mean | 0.533333 |
| std | 0.819307 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 0.000000 |
| 75% | 1.000000 |
| max | 2.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_leading_eigenvector_oj.membership)):
vertex_colors.append(colors[comm_leading_eigenvector_oj.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_oj.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_oj.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_oj,'directed_all_jur_images/communities_leading_eigenvector.png', vertex_label = g_oj.vs['name'] , **visual_style)
# Set the layout
my_layout = g_oj.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_oj,'directed_all_jur_images/leading_vector_circular.png', vertex_label = g_oj.vs['name'] , **visual_style)
for i in range(len(comm_leading_eigenvector_oj.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_leading_eigenvector_oj.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_leading_eigenvector_oj.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_leading_eigenvector_oj.subgraphs()[i],'directed_all_jur_images/leading_eigenvector_subgraph' + str(i) + '.png' ,vertex_label = comm_leading_eigenvector_oj.subgraphs()[i].vs['name'],**visual_style )
comm_spinglass_oj = g_oj.community_spinglass(weights = 'weight')
ig.plot(comm_spinglass_oj, mark_groups = True, layout="drl")
modularity = g_oj.modularity(comm_spinglass_oj, weights='weight')
print(modularity)
0.10381852551984883
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_spinglass_oj))
print('average degree: ', get_avg_degree(comm_spinglass_oj))
print('transitivity: ', get_transitivity(comm_spinglass_oj))
print('diameter: ', get_diameter(comm_spinglass_oj))
average weighted degree: [0.5, 1.0, 0.75, 4.05] average degree: [1.0, 1.5, 1.5, 2.1] transitivity: [nan, 0.0, 0.0, 0.018867924528301886] diameter: [1, 1, 1, 2]
df_spinglass_oj = pd.DataFrame({'name': list(g_oj.vs['name']), 'community':comm_spinglass_oj.membership } )
df_spinglass_oj
| name | community | |
|---|---|---|
| 0 | British Virgin Islands | 3 |
| 1 | Cook Islands | 1 |
| 2 | Samoa | 2 |
| 3 | Seychelles | 3 |
| 4 | Canada | 3 |
| 5 | Cayman Islands | 2 |
| 6 | China | 3 |
| 7 | Labuan | 3 |
| 8 | Singapore | 0 |
| 9 | Czech Republic | 3 |
| 10 | Fiji | 3 |
| 11 | Hong Kong | 3 |
| 12 | India | 2 |
| 13 | Indonesia | 3 |
| 14 | Italy | 1 |
| 15 | Japan | 3 |
| 16 | Jersey | 3 |
| 17 | Macao | 1 |
| 18 | Malaysia | 3 |
| 19 | Monaco | 3 |
| 20 | New Zealand | 3 |
| 21 | Philippines | 3 |
| 22 | Romania | 3 |
| 23 | South Korea | 3 |
| 24 | Taiwan | 3 |
| 25 | Thailand | 2 |
| 26 | US Virgin Islands | 0 |
| 27 | United Arab Emirates | 3 |
| 28 | United Kingdom | 3 |
| 29 | United States | 1 |
df_spinglass_oj.describe()
| community | |
|---|---|
| count | 30.000000 |
| mean | 2.400000 |
| std | 0.968468 |
| min | 0.000000 |
| 25% | 2.000000 |
| 50% | 3.000000 |
| 75% | 3.000000 |
| max | 3.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_spinglass_oj.membership)):
vertex_colors.append(colors[comm_spinglass_oj.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_oj.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_oj.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_oj,'directed_all_jur_images/communities_spinglass.png', vertex_label = g_oj.vs['name'] , **visual_style)
# Set the layout
my_layout = g_oj.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_oj,'directed_all_jur_images/spinglass_circular.png', vertex_label = g_oj.vs['name'] , **visual_style)
for i in range(len(comm_spinglass_oj.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_spinglass_oj.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_spinglass_oj.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_spinglass_oj.subgraphs()[i],'directed_all_jur_images/spinglass_subgraph' + str(i) + '.png' ,vertex_label = comm_spinglass_oj.subgraphs()[i].vs['name'],**visual_style )
walktrap_oj = g_oj.community_walktrap(weights = 'weight', steps = 2)
comm_walktrap_oj = walktrap_oj.as_clustering()
ig.plot(comm_walktrap_oj, mark_groups = True, layout="drl")
modularity = g_oj.modularity(comm_walktrap_oj, weights='weight')
print(modularity)
0.03327032136105858
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_walktrap_oj))
print('average degree: ', get_avg_degree(comm_walktrap_oj))
print('transitivity: ', get_transitivity(comm_walktrap_oj))
print('diameter: ', get_diameter(comm_walktrap_oj))
average weighted degree: [4.074074074074074, 0.6666666666666666] average degree: [2.814814814814815, 1.3333333333333333] transitivity: [0.09880239520958084, 0.0] diameter: [3, 1]
df_walktrap_oj = pd.DataFrame({'name': list(g_oj.vs['name']), 'community':comm_walktrap_oj.membership } )
df_walktrap_oj
| name | community | |
|---|---|---|
| 0 | British Virgin Islands | 0 |
| 1 | Cook Islands | 1 |
| 2 | Samoa | 0 |
| 3 | Seychelles | 0 |
| 4 | Canada | 0 |
| 5 | Cayman Islands | 0 |
| 6 | China | 0 |
| 7 | Labuan | 0 |
| 8 | Singapore | 0 |
| 9 | Czech Republic | 0 |
| 10 | Fiji | 0 |
| 11 | Hong Kong | 0 |
| 12 | India | 0 |
| 13 | Indonesia | 0 |
| 14 | Italy | 1 |
| 15 | Japan | 0 |
| 16 | Jersey | 0 |
| 17 | Macao | 1 |
| 18 | Malaysia | 0 |
| 19 | Monaco | 0 |
| 20 | New Zealand | 0 |
| 21 | Philippines | 0 |
| 22 | Romania | 0 |
| 23 | South Korea | 0 |
| 24 | Taiwan | 0 |
| 25 | Thailand | 0 |
| 26 | US Virgin Islands | 0 |
| 27 | United Arab Emirates | 0 |
| 28 | United Kingdom | 0 |
| 29 | United States | 0 |
df_walktrap_oj.describe()
| community | |
|---|---|
| count | 30.000000 |
| mean | 0.100000 |
| std | 0.305129 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 0.000000 |
| 75% | 0.000000 |
| max | 1.000000 |
# ##plot community
# visual_style = {}
# vertex_colors = []
# for i in range(len(comm_walktrap_oj.membership)):
# vertex_colors.append(colors[comm_walktrap_oj.membership[i]])
# # Set bbox and margin
# visual_style["bbox"] = (5000,5000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = vertex_colors
# # Set edge width
# visual_style["edge_width"] = np.log(g_oj.es['weight']) + 1
# # Set edge color
# visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# # Set vertex size
# visual_style["vertex_size"] = 15
# # Set vertex label size
# visual_style["vertex_label_size"] = 20
# # Set vertex label color
# visual_style["vertex_label_color"] = 'red'
# # Don't curve the edges
# visual_style["edge_curved"] = False
# # Set the layout
# my_layout = g_oj.layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_oj,'directed_all_jur_images/communities_walktrap.png', vertex_label = g_oj.vs['name'] , **visual_style)
# # Set the layout
# my_layout = g_oj.layout_circle()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_oj,'directed_all_jur_images/walktrap_circular.png', vertex_label = g_oj.vs['name'] , **visual_style)
# for i in range(len(comm_walktrap_oj.subgraphs())):
# visual_style["bbox"] = (2000,2000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = 'green'
# # Set edge width
# visual_style["edge_width"] = np.log(comm_walktrap_oj.subgraphs()[i].es['weight']) + 1
# # Set the layout
# my_layout = comm_walktrap_oj.subgraphs()[i].layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# ig.plot(comm_walktrap_oj.subgraphs()[i],'directed_all_jur_images/walktrap_subgraph' + str(i) + '.png' ,vertex_label = comm_walktrap_oj.subgraphs()[i].vs['name'],**visual_style )
comm_infomap_oj = g_oj.community_infomap(edge_weights = 'weight')
ig.plot(comm_infomap_oj, mark_groups = True, layout="drl")
modularity = g_oj.modularity(comm_infomap_oj, weights='weight')
print(modularity)
0.0
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_infomap_oj))
print('average degree: ', get_avg_degree(comm_infomap_oj))
print('transitivity: ', get_transitivity(comm_infomap_oj))
print('diameter: ', get_diameter(comm_infomap_oj))
average weighted degree: [3.8333333333333335] average degree: [2.8] transitivity: [0.09863013698630137] diameter: [3]
df_infomap_oj = pd.DataFrame({'name': list(g_oj.vs['name']), 'community':comm_infomap_oj.membership } )
df_infomap_oj
| name | community | |
|---|---|---|
| 0 | British Virgin Islands | 0 |
| 1 | Cook Islands | 0 |
| 2 | Samoa | 0 |
| 3 | Seychelles | 0 |
| 4 | Canada | 0 |
| 5 | Cayman Islands | 0 |
| 6 | China | 0 |
| 7 | Labuan | 0 |
| 8 | Singapore | 0 |
| 9 | Czech Republic | 0 |
| 10 | Fiji | 0 |
| 11 | Hong Kong | 0 |
| 12 | India | 0 |
| 13 | Indonesia | 0 |
| 14 | Italy | 0 |
| 15 | Japan | 0 |
| 16 | Jersey | 0 |
| 17 | Macao | 0 |
| 18 | Malaysia | 0 |
| 19 | Monaco | 0 |
| 20 | New Zealand | 0 |
| 21 | Philippines | 0 |
| 22 | Romania | 0 |
| 23 | South Korea | 0 |
| 24 | Taiwan | 0 |
| 25 | Thailand | 0 |
| 26 | US Virgin Islands | 0 |
| 27 | United Arab Emirates | 0 |
| 28 | United Kingdom | 0 |
| 29 | United States | 0 |
df_weighted['status'].unique()
array(['jurisdiction', 'intermediary_base', 'intermediary_jurisdiction',
'officer_base', 'officer_jurisdiction'], dtype=object)
jurisdiction_list = ['officer_jurisdiction', 'jurisdiction', 'officer_base']
df_weighted_jur = df_weighted[df_weighted['status'].isin(jurisdiction_list)]
df_jur = df_weighted_jur.drop(['status'], axis = 1)
df_jur
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Hong Kong | Samoa | 2.0 |
| 1 | Hong Kong | Samoa | 2.0 |
| 2 | Hong Kong | Samoa | 2.0 |
| 3 | Hong Kong | Samoa | 2.0 |
| 4 | Hong Kong | Samoa | 2.0 |
| ... | ... | ... | ... |
| 338119 | Indonesia | British Virgin Islands | 1.0 |
| 338120 | Cook Islands | British Virgin Islands | 1.0 |
| 338121 | Hong Kong | British Virgin Islands | 1.0 |
| 338122 | Cook Islands | United Kingdom | 1.0 |
| 338123 | Hong Kong | United Kingdom | 1.0 |
329073 rows × 3 columns
df_jur_grouped = df_jur.groupby(['node_start','node_end'], as_index = False).sum()
df_jur_grouped
| node_start | node_end | weight | |
|---|---|---|---|
| 0 | Albania | Bahamas | 4.0 |
| 1 | Algeria | British Virgin Islands | 2.0 |
| 2 | Algeria | United Kingdom | 2.0 |
| 3 | Algeria | United States | 1.0 |
| 4 | Andorra | Bahamas | 52.0 |
| ... | ... | ... | ... |
| 2750 | Zimbabwe | Hong Kong | 3.0 |
| 2751 | Zimbabwe | Jersey | 1.0 |
| 2752 | Zimbabwe | Panama | 6.0 |
| 2753 | Zimbabwe | Russia | 2.0 |
| 2754 | Zimbabwe | United States | 1.0 |
2755 rows × 3 columns
# df_jur_grouped_list = df_jur_grouped.values.tolist()
# df_jur_grouped_list
# d = collections.defaultdict(int)
# for n1, n2, v in df_jur_grouped_list:
# d[min(n1, n2), max(n1, n2)] += v
# result = [[k[0], k[1], v] for k, v in d.items()]
# df_net = pd.DataFrame(result)
# df_net
# df_net = df_net.rename(columns = {0:'node_1', 1:'node_2', 2: 'weight'})
# df_net
#df_net.to_csv('undirected_weighted_grouped.csv', index = False)
# df_net.describe()
g_ni = ig.Graph.TupleList(df_jur_grouped.itertuples(index=False), directed=True, edge_attrs={'weight': "weight"})
ig.plot(g_ni)
g_ni.diameter()
4
print( "Clustering Coefficient: ", g_ni.transitivity_undirected())
Clustering Coefficient: 0.3829243625911535
np.mean(g_ni.degree())
28.256410256410255
np.mean(g_ni.strength(weights = 'weight'))
5595.635897435897
df_strength = pd.DataFrame({'name': list(g_ni.vs['name']), 'weighted_degree':g_ni.strength(weights = 'weight') } )
df_strength = df_strength.sort_values('weighted_degree', ascending = False).reset_index(drop = True)
df_strength.head(10)
| name | weighted_degree | |
|---|---|---|
| 0 | British Virgin Islands | 283283.5 |
| 1 | Hong Kong | 110569.0 |
| 2 | Panama | 109720.0 |
| 3 | Switzerland | 77116.0 |
| 4 | Bahamas | 40682.0 |
| 5 | United Kingdom | 37265.0 |
| 6 | Taiwan | 33699.5 |
| 7 | Seychelles | 32521.5 |
| 8 | Jersey | 29830.5 |
| 9 | China | 24897.5 |
#run cell above before running this always
df_strength['weighted_degree'].plot()
plt.title('Weighted Degree Distribution (No Intermediary Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Degree')
Text(0, 0.5, 'Weighted Degree')
df_indegree = pd.DataFrame({'name': list(g_ni.vs['name']), 'weighted_indegree':g_ni.strength(weights = 'weight', mode = 'in') } )
df_indegree = df_indegree.sort_values('weighted_indegree', ascending = False).reset_index(drop = True)
df_indegree.head(10)
| name | weighted_indegree | |
|---|---|---|
| 0 | British Virgin Islands | 276368.0 |
| 1 | Panama | 89709.0 |
| 2 | Seychelles | 31433.5 |
| 3 | Bahamas | 31079.0 |
| 4 | Niue | 18912.0 |
| 5 | United Kingdom | 15691.0 |
| 6 | Hong Kong | 13566.0 |
| 7 | Samoa | 12487.5 |
| 8 | Russia | 10494.0 |
| 9 | British Anguilla | 6480.0 |
#run cell above before running this always
df_indegree['weighted_indegree'].plot()
plt.title('Weighted In Degree Distribution (No Intermediary Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted In Degree')
Text(0, 0.5, 'Weighted In Degree')
df_outdegree = pd.DataFrame({'name': list(g_ni.vs['name']), 'weighted_outdegree':g_ni.strength(weights = 'weight', mode = 'out') } )
df_outdegree = df_outdegree.sort_values('weighted_outdegree', ascending = False).reset_index(drop = True)
df_outdegree.head(10)
| name | weighted_outdegree | |
|---|---|---|
| 0 | Hong Kong | 97003.0 |
| 1 | Switzerland | 76895.0 |
| 2 | Taiwan | 33699.5 |
| 3 | Jersey | 29417.5 |
| 4 | China | 22343.5 |
| 5 | Luxembourg | 21765.0 |
| 6 | United Kingdom | 21574.0 |
| 7 | Panama | 20011.0 |
| 8 | Singapore | 16349.5 |
| 9 | United States | 15964.5 |
#run cell above before running this always
df_outdegree['weighted_outdegree'].plot()
plt.title('Weighted Out Degree Distribution (No Intermediary Relationships)')
plt.xlabel('Countries')
plt.ylabel('Weighted Out Degree')
Text(0, 0.5, 'Weighted Out Degree')
df_pagerank = pd.DataFrame({'name': list(g_ni.vs['name']), 'pagerank':g_ni.pagerank(weights = 'weight') } )
df_pagerank = df_pagerank.sort_values('pagerank', ascending = False).reset_index(drop = True)
df_pagerank.head(10)
| name | pagerank | |
|---|---|---|
| 0 | British Virgin Islands | 0.263850 |
| 1 | Panama | 0.128837 |
| 2 | Barbados | 0.080968 |
| 3 | Bahamas | 0.050375 |
| 4 | Hong Kong | 0.046558 |
| 5 | United Kingdom | 0.046447 |
| 6 | Seychelles | 0.037883 |
| 7 | Russia | 0.031472 |
| 8 | Niue | 0.028551 |
| 9 | Bermuda | 0.016875 |
#run cell above before running this always
df_pagerank['pagerank'].plot()
plt.title('Pagerank Distribution (No Intermediary Relationships)')
plt.xlabel('Countries')
plt.ylabel('Pagerank')
Text(0, 0.5, 'Pagerank')
df_authorities = pd.DataFrame({'name': list(g_ni.vs['name']), 'authority_score':g_ni.authority_score(weights = 'weight') } )
df_authorities = df_authorities.sort_values('authority_score', ascending = False).reset_index(drop = True)
df_authorities.head(10)
| name | authority_score | |
|---|---|---|
| 0 | British Virgin Islands | 1.000000 |
| 1 | Panama | 0.207994 |
| 2 | Seychelles | 0.103974 |
| 3 | Bahamas | 0.070005 |
| 4 | Samoa | 0.063105 |
| 5 | Niue | 0.056301 |
| 6 | United Kingdom | 0.039640 |
| 7 | British Anguilla | 0.027027 |
| 8 | Russia | 0.026683 |
| 9 | Hong Kong | 0.015784 |
#run cell above before running this always
df_authorities['authority_score'].plot()
plt.title('Authority Score Distribution (No Intermediary Relationships)')
plt.xlabel('Countries')
plt.ylabel('Authority Score')
Text(0, 0.5, 'Authority Score')
comm_lp_ni= g_ni.community_label_propagation(weights = 'weight')
ig.plot(comm_lp_ni, mark_groups = True, layout="drl")
modularity = g_ni.modularity(comm_lp_ni, weights='weight')
print(modularity)
0.0012668163862697814
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_lp_ni))
print('average degree: ', get_avg_degree(comm_lp_ni))
print('transitivity: ', get_transitivity(comm_lp_ni))
print('diameter: ', get_diameter(comm_lp_ni))
average weighted degree: [0.0, 13317.28, 0.0, 0.0, 84.0, 0.0, 0.0, 18.0, 0.0, 0.0, 230.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 44.16, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.6641905218336989, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 3, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
comm_leading_eigenvector_ni= g_ni.community_leading_eigenvector(weights = 'weight')
ig.plot(comm_leading_eigenvector_ni, mark_groups = True, layout="drl")
/Users/susankoruthu/opt/anaconda3/lib/python3.8/site-packages/igraph/__init__.py:1281: RuntimeWarning: This method was developed for undirected graphs at src/community/leading_eigenvector.c:530 membership, _, q = GraphBase.community_leading_eigenvector(
modularity = g_ni.modularity(comm_leading_eigenvector_ni, weights='weight')
print(modularity)
0.14956936235767823
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_leading_eigenvector_ni))
print('average degree: ', get_avg_degree(comm_leading_eigenvector_ni))
print('transitivity: ', get_transitivity(comm_leading_eigenvector_ni))
print('diameter: ', get_diameter(comm_leading_eigenvector_ni))
average weighted degree: [3397.7586206896553, 4895.692307692308, 0.0, 0.0, 0.0, 1.0, 0.0, 16.0, 0.0] average degree: [8.206896551724139, 23.53846153846154, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0] transitivity: [0.27990271943400397, 0.44260359931244436, nan, nan, nan, nan, nan, nan, nan] diameter: [3, 3, 0, 0, 0, 1, 0, 0, 0]
df_leading_eigenvector_ni= pd.DataFrame({'name': list(g_ni.vs['name']), 'community':comm_leading_eigenvector_ni.membership } )
df_leading_eigenvector_ni
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Algeria | 1 |
| 3 | British Virgin Islands | 1 |
| 4 | United Kingdom | 1 |
| ... | ... | ... |
| 190 | Venezuela | 0 |
| 191 | Vietnam | 1 |
| 192 | Yemen | 0 |
| 193 | Zambia | 1 |
| 194 | Zimbabwe | 1 |
195 rows × 2 columns
df_leading_eigenvector_ni.describe()
| community | |
|---|---|
| count | 195.000000 |
| mean | 0.928205 |
| std | 1.076925 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 1.000000 |
| 75% | 1.000000 |
| max | 8.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_leading_eigenvector_ni.membership)):
vertex_colors.append(colors[comm_leading_eigenvector_ni.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_ni.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_ni.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_ni,'directed_all_jur_images/communities_leading_eigenvector.png', vertex_label = g_ni.vs['name'] , **visual_style)
# Set the layout
my_layout = g_ni.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_ni,'directed_all_jur_images/leading_vector_circular.png', vertex_label = g_ni.vs['name'] , **visual_style)
for i in range(len(comm_leading_eigenvector_ni.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_leading_eigenvector_ni.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_leading_eigenvector_ni.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_leading_eigenvector_ni.subgraphs()[i],'directed_all_jur_images/leading_eigenvector_subgraph' + str(i) + '.png' ,vertex_label = comm_leading_eigenvector_ni.subgraphs()[i].vs['name'],**visual_style )
comm_spinglass_ni = g_ni.community_spinglass(weights = 'weight')
ig.plot(comm_spinglass_ni, mark_groups = True, layout="drl")
modularity = g_ni.modularity(comm_spinglass_ni, weights='weight')
print(modularity)
0.054326644420026556
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_spinglass_ni))
print('average degree: ', get_avg_degree(comm_spinglass_ni))
print('transitivity: ', get_transitivity(comm_spinglass_ni))
print('diameter: ', get_diameter(comm_spinglass_ni))
average weighted degree: [1283.1864406779662, 1465.157894736842, 798.6315789473684, 2070.9411764705883, 1798.0, 126.42857142857143, 313.6, 844.0, 230.0] average degree: [3.4237288135593222, 4.947368421052632, 11.631578947368421, 3.823529411764706, 4.2727272727272725, 2.4285714285714284, 2.0, 1.0, 1.0] transitivity: [0.06727099236641221, 0.39, 0.4916555407209613, 0.1836734693877551, 0.36401673640167365, 0.11392405063291139, 0.0, nan, nan] diameter: [3, 3, 3, 4, 3, 2, 1, 1, 1]
df_spinglass_ni = pd.DataFrame({'name': list(g_ni.vs['name']), 'community':comm_spinglass_ni.membership } )
df_spinglass_ni
| name | community | |
|---|---|---|
| 0 | Albania | 1 |
| 1 | Bahamas | 1 |
| 2 | Algeria | 2 |
| 3 | British Virgin Islands | 0 |
| 4 | United Kingdom | 2 |
| ... | ... | ... |
| 190 | Venezuela | 3 |
| 191 | Vietnam | 0 |
| 192 | Yemen | 3 |
| 193 | Zambia | 0 |
| 194 | Zimbabwe | 0 |
195 rows × 2 columns
df_spinglass_ni.describe()
| community | |
|---|---|
| count | 195.000000 |
| mean | 2.128205 |
| std | 1.901972 |
| min | 0.000000 |
| 25% | 0.000000 |
| 50% | 2.000000 |
| 75% | 3.000000 |
| max | 8.000000 |
##plot community
visual_style = {}
vertex_colors = []
for i in range(len(comm_spinglass_ni.membership)):
vertex_colors.append(colors[comm_spinglass_ni.membership[i]])
# Set bbox and margin
visual_style["bbox"] = (5000,5000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = vertex_colors
# Set edge width
visual_style["edge_width"] = np.log(g_ni.es['weight']) + 1
# Set edge color
visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# Set vertex size
visual_style["vertex_size"] = 15
# Set vertex label size
visual_style["vertex_label_size"] = 20
# Set vertex label color
visual_style["vertex_label_color"] = 'red'
# Don't curve the edges
visual_style["edge_curved"] = False
# Set the layout
my_layout = g_ni.layout_fruchterman_reingold()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_ni,'directed_all_jur_images/communities_spinglass.png', vertex_label = g_ni.vs['name'] , **visual_style)
# Set the layout
my_layout = g_ni.layout_circle()
visual_style["layout"] = my_layout
# Plot the graph
ig.plot(g_ni,'directed_all_jur_images/spinglass_circular.png', vertex_label = g_ni.vs['name'] , **visual_style)
for i in range(len(comm_spinglass_ni.subgraphs())):
visual_style["bbox"] = (2000,2000)
visual_style["margin"] = 17
# Set vertex colours
visual_style["vertex_color"] = 'green'
# Set edge width
visual_style["edge_width"] = np.log(comm_spinglass_ni.subgraphs()[i].es['weight']) + 1
# Set the layout
my_layout = comm_spinglass_ni.subgraphs()[i].layout_fruchterman_reingold()
visual_style["layout"] = my_layout
ig.plot(comm_spinglass_ni.subgraphs()[i],'directed_all_jur_images/spinglass_subgraph' + str(i) + '.png' ,vertex_label = comm_spinglass_ni.subgraphs()[i].vs['name'],**visual_style )
walktrap_ni = g_ni.community_walktrap(weights = 'weight', steps = 2)
comm_walktrap_ni = walktrap_ni.as_clustering()
ig.plot(comm_walktrap_ni, mark_groups = True, layout="drl")
modularity = g_ni.modularity(comm_walktrap_ni, weights='weight')
print(modularity)
0.002879845289073743
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_walktrap_ni))
print('average degree: ', get_avg_degree(comm_walktrap_ni))
print('transitivity: ', get_transitivity(comm_walktrap_ni))
print('diameter: ', get_diameter(comm_walktrap_ni))
average weighted degree: [0.0, 8642.008064516129, 0.0, 0.0, 54.285714285714285, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] average degree: [0.0, 37.903225806451616, 0.0, 0.0, 1.7142857142857142, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] transitivity: [nan, 0.4614273777519436, nan, nan, 0.0, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan] diameter: [0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
df_walktrap_ni = pd.DataFrame({'name': list(g_ni.vs['name']), 'community':comm_walktrap_ni.membership } )
df_walktrap_ni
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 1 |
| 2 | Algeria | 2 |
| 3 | British Virgin Islands | 1 |
| 4 | United Kingdom | 1 |
| ... | ... | ... |
| 190 | Venezuela | 1 |
| 191 | Vietnam | 1 |
| 192 | Yemen | 63 |
| 193 | Zambia | 64 |
| 194 | Zimbabwe | 65 |
195 rows × 2 columns
df_walktrap_ni.describe()
| community | |
|---|---|
| count | 195.000000 |
| mean | 11.753846 |
| std | 18.576144 |
| min | 0.000000 |
| 25% | 1.000000 |
| 50% | 1.000000 |
| 75% | 16.500000 |
| max | 65.000000 |
# ##plot community
# visual_style = {}
# vertex_colors = []
# for i in range(len(comm_walktrap_ni.membership)):
# vertex_colors.append(colors[comm_walktrap_ni.membership[i]])
# # Set bbox and margin
# visual_style["bbox"] = (5000,5000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = vertex_colors
# # Set edge width
# visual_style["edge_width"] = np.log(g_ni.es['weight']) + 1
# # Set edge color
# visual_style["edge_color"] = "rgba(1,1,1,0.1)"
# # Set vertex size
# visual_style["vertex_size"] = 15
# # Set vertex label size
# visual_style["vertex_label_size"] = 20
# # Set vertex label color
# visual_style["vertex_label_color"] = 'red'
# # Don't curve the edges
# visual_style["edge_curved"] = False
# # Set the layout
# my_layout = g_ni.layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_ni,'directed_all_jur_images/communities_walktrap.png', vertex_label = g_ni.vs['name'] , **visual_style)
# # Set the layout
# my_layout = g_ni.layout_circle()
# visual_style["layout"] = my_layout
# # Plot the graph
# ig.plot(g_ni,'directed_all_jur_images/walktrap_circular.png', vertex_label = g_ni.vs['name'] , **visual_style)
# for i in range(len(comm_walktrap_ni.subgraphs())):
# visual_style["bbox"] = (2000,2000)
# visual_style["margin"] = 17
# # Set vertex colours
# visual_style["vertex_color"] = 'green'
# # Set edge width
# visual_style["edge_width"] = np.log(comm_walktrap_ni.subgraphs()[i].es['weight']) + 1
# # Set the layout
# my_layout = comm_walktrap_ni.subgraphs()[i].layout_fruchterman_reingold()
# visual_style["layout"] = my_layout
# ig.plot(comm_walktrap_ni.subgraphs()[i],'directed_all_jur_images/walktrap_subgraph' + str(i) + '.png' ,vertex_label = comm_walktrap_ni.subgraphs()[i].vs['name'],**visual_style )
comm_infomap_ni = g_ni.community_infomap(edge_weights = 'weight')
ig.plot(comm_infomap_ni, mark_groups = True, layout="drl")
modularity = g_ni.modularity(comm_infomap_ni, weights='weight')
print(modularity)
0.0
#get metrics for each community
print('average weighted degree: ', get_avg_weighted_degree(comm_infomap_ni))
print('average degree: ', get_avg_degree(comm_infomap_ni))
print('transitivity: ', get_transitivity(comm_infomap_ni))
print('diameter: ', get_diameter(comm_infomap_ni))
average weighted degree: [5595.635897435897] average degree: [28.256410256410255] transitivity: [0.3829243625911535] diameter: [4]
df_infomap_ni = pd.DataFrame({'name': list(g_ni.vs['name']), 'community':comm_infomap_ni.membership } )
df_infomap_ni
| name | community | |
|---|---|---|
| 0 | Albania | 0 |
| 1 | Bahamas | 0 |
| 2 | Algeria | 0 |
| 3 | British Virgin Islands | 0 |
| 4 | United Kingdom | 0 |
| ... | ... | ... |
| 190 | Venezuela | 0 |
| 191 | Vietnam | 0 |
| 192 | Yemen | 0 |
| 193 | Zambia | 0 |
| 194 | Zimbabwe | 0 |
195 rows × 2 columns